"""author: wenyao
   data: 2020/11/11
   project: ludingji
"""

import re
import requests
ret = requests.get('https://www.sucai8.com')
#print(ws.text)
# ret = re.findall(r"<img",ws.text,re.M)
#msg= '<a href="/u/admin/" class="fl" target="_blank"><img class="radius100" width="36" height="36" src="/uploads/userup/1/myface.jpg" /></a>'

#msg = '<div style="background-color:#580094;"><a href="http://www.xunmoban.com" title="幻灯片1" data-id="1" target="_blank" class="slideLazy clickImg"><img src="/uploads/allimg/170812/1-1FQ2152ZUX.jpg" alt="幻灯片1" width="1200" height="320"><div class="banner-stress"><div class="banner-stress-bg"></div>幻灯片1</div></a></div><div style="background-color:#771d98;"><a href="http://www.xunmoban.com" title="幻灯片2" data-id="2" target="_blank" class="slideLazy clickImg"><img src="/uploads/allimg/170812/1-1FQ215294I23.jpg" alt="幻灯片2" width="1200" height="320"><div class="banner-stress"><div class="banner-stress-bg"></div>幻灯片2</div></a></div><div style="background-color:#241f1b;"><a href="http://www.xunmoban.com" title="幻灯片3" data-id="3" target="_blank" class="slideLazy clickImg"><img src="/uploads/allimg/170812/1-1FQ2154641G8.jpg" alt="幻灯片3" width="1200" height="320"><div class="banner-stress"><div class="banner-stress-bg"></div>幻灯片3</div></a></div><div style="background-color:#fbc9c2;"><a href="http://www.xunmoban.com" title="幻灯片4" data-id="4" target="_blank" class="slideLazy clickImg"><img src="/uploads/allimg/170812/1-1FQ2154K2926.jpg" alt="幻灯片4" width="1200" height="320"><div class="banner-stress"><div class="banner-stress-bg"></div>幻灯片4</div></a></div><div style="background-color:#4dbff9;"><a href="http://www.xunmoban.com" title="幻灯片5" data-id="5" target="_blank" class="slideLazy clickImg"><img src="/uploads/allimg/170812/1-1FQ2154S0503.jpg" alt="幻灯片5" width="1200" height="320"><div class="banner-stress"><div class="banner-stress-bg"></div>幻灯片5</div></a></div><div style="background-color:#010101;"><a href="http://www.xunmoban.com" title="幻灯片6" data-id="6" target="_blank" class="slideLazy clickImg"><img src="/uploads/allimg/170812/1-1FQ2154ZW47.jpg" alt="幻灯片6" width="1200" height="320"><div class="banner-stress"><div class="banner-stress-bg"></div>幻灯片6</div></a></div><div style="background-color:#fee801;"><a href="http://www.xunmoban.com" title="幻灯片7" data-id="7" target="_blank" class="slideLazy clickImg"><img src="/uploads/allimg/170812/1-1FQ2144405353.jpg" alt="幻灯片7" width="1200" height="320"><div class="banner-stress"><div class="banner-stress-bg"></div>幻灯片7</div></a></div><div style="background-color:#000116;"><a href="http://www.xunmoban.com" title="幻灯片8" data-id="8" target="_blank" class="slideLazy clickImg"><img src="/uploads/allimg/170812/1-1FQ2143HS18.jpg" alt="幻灯片8" width="1200" height="320"><div class="banner-stress"><div class="banner-stress-bg"></div>幻灯片8</div></a></div>'
# ret = re.findall(r'<img.*?(?<=src=")(.+?(?:\.jpg))(?=")',ws.text,re.M)
# print(ret)
# print(len(ret))
# # print(len(set(ret)))
# p=[]
# for i in ret:
#     if i.startswith("http"):
#         p.append(i)
#     else:
#         j = "https://www.sucai8.com"+i
#         p.append(j)
# print(p)
# print(len(p))
#
# import re
# import requests
#ret1 = requests.get("https://www.bilibili.com/")
# ret1 = requests.get("https://www.sucai8.com")
# a = re.findall(r"(?:src=\")(.*?\.(?:jpg))",ret1.text)
# print(a)
# print(len(a))
#
# print(set(ret)^set(a))
# b = re.findall(r"(?:\<a)")
# for i in range(len(a)):
#     try:
#         ret = requests.get(url = f"{a[i]}")
#         print(ret.url)
#         # with open(f"E:/hxli/{i}.jpg","wb") as f:
#         #     f.write(ret.content)
#     except Exception as af:
#         ret = requests.get(url = f"http:{a[i]}")
#         print(ret.url)
        # with open(f"E:/hxli/{i}.jpg","wb") as f:
        #     f.write(ret.content)

b = re.findall(r"(?:<a href=\")(.+?)(?:\")",ret.text)
p = []
for i in range(len(b)):
    if b[i][0] == "/":
        p.append("https://www.sucai8.com/"+b[i])
    elif b[i][0] == "h":
        p.append(b[i])
    else:
        pass
print(len(set(p)))

ret2 = re.findall(r'(?:<a.*?(?<=href=\"))([/h].*?)(?=\")',ret.text,re.M)
print(len(set(ret2)))